Introduction

library(neotoma2)
library(tidyverse)
library(DT)
library(geojsonsf)
library(sf)
library(leaflet)

Getting Data

See this other tutorial for …

tanganyikaSites = get_sites(sitename = "%Tanganyika%")


plotLeaflet(tanganyikaSites)

Need to be careful because the sites appear as points there, but may not actually be points in the metadata

API - site metadata

library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
## The following object is masked from 'package:neotoma2':
## 
##     toJSON
library(httr)

siteidString = paste0(tanganyikaSites$siteid,collapse=",")

apiCall = paste0('https://api.neotomadb.org/v2.0/data/sites/',siteidString,'/datasets')


response = GET(apiCall)

siteMetadata = content(response)$data


siteMetadata[[1]]$site[1:5]
## $siteid
## [1] 27348
## 
## $sitename
## [1] "Lake Tanganyika"
## 
## $sitedescription
## [1] "Open water offshore of coast, Lake Tanganyika is permanently anoxic below 150 m depth and is considered an oligotrophic system."
## 
## $sitenotes
## NULL
## 
## $geography
## [1] "{\"type\":\"Polygon\",\"crs\":{\"type\":\"name\",\"properties\":{\"name\":\"EPSG:4326\"}},\"coordinates\":[[[29.12625,-8.80904],[29.12625,-3.37081],[31.12576,-3.37081],[31.12576,-8.80904],[29.12625,-8.80904]]]}"

The structure is like this… but hard to look at this way, let’s turn it into a dataframe - only selecting the first chronology and only selecting the first dataset pi’s name

idx = 0

for (i in seq(length(siteMetadata))) {
  for (j in seq(length(siteMetadata[[i]]$site$datasets))) {
    idx = idx + 1
  }
}


siteMetadata_mat = matrix(nrow=idx,ncol=19)

idx2 = 0
for (i in seq(length(siteMetadata))) {
  for (j in seq(length(siteMetadata[[i]]$site$datasets))) {
    idx2 = idx2 + 1
    for (k in seq(10)) {
      if (!is.null(siteMetadata[[i]]$site[[k]])) {
        siteMetadata_mat[[idx2, k]] = siteMetadata[[i]]$site[[k]]
      }
    }
    
     if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$doi[[1]])) {
        siteMetadata_mat[[idx2, 11]] = siteMetadata[[i]]$site$datasets[[j]]$doi[[1]]
     }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[1]])) {
        siteMetadata_mat[[idx2, 12]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[1]]
    }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[2]])) {
        siteMetadata_mat[[idx2, 13]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[2]]
      }
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[3]])) {
        siteMetadata_mat[[idx2, 14]] = siteMetadata[[i]]$site$datasets[[j]]$agerange[[1]][[3]]
    }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$database)) {
        siteMetadata_mat[[idx2, 15]] = siteMetadata[[i]]$site$datasets[[j]]$database
    }
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetid)) {
        siteMetadata_mat[[idx2, 16]] = siteMetadata[[i]]$site$datasets[[j]]$datasetid
    }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetpi[[1]]$contactname)) {
        siteMetadata_mat[[idx2, 17]] = siteMetadata[[i]]$site$datasets[[j]]$datasetpi[[1]]$contactname
    }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasettype)) {
        siteMetadata_mat[[idx2, 18]] = siteMetadata[[i]]$site$datasets[[j]]$datasettype
    }
    
    if (!is.null(siteMetadata[[i]]$site$datasets[[j]]$datasetnotes)) {
        siteMetadata_mat[[idx2, 19]] = siteMetadata[[i]]$site$datasets[[j]]$datasetnotes
    }
  }
}


siteMetadata_df = as.data.frame(siteMetadata_mat)

names(siteMetadata_df) = c("siteid","sitename","sitedescription","sitenotes","geography","altitude","collectionunitid","collectionunit","handle","unittype","dataset_doi","dataset_agerange_units","dataset_ageold","dataset_ageyoung","database","datasetid","datasetpi","datasettype","datasetnotes")

#just first ten cols
datatable(siteMetadata_df[1:10],rownames=FALSE)

Ok, it was kinda a lot of work to get that table together. (And notice that one of these datasets is in the Alaska Archaeofaunas database - a good reminder that these metadata can have errors !) Now let’s look at the geography again - you can see there are some polygons

siteGeo_sf = geojson_sf(siteMetadata_df$geography)

siteMetadata_sf = cbind(siteGeo_sf,siteMetadata_df)

pointSites = siteMetadata_sf[st_geometry_type(siteMetadata_sf) == "POINT",]
polySites = siteMetadata_sf[st_geometry_type(siteMetadata_sf) == "POLYGON",]

leaflet() %>%
  addTiles() %>%
  addPolygons(data = polySites, 
              color = "red", 
              weight = 2, 
              fillColor = "orange", 
              fillOpacity = 0.05, 
              popup = ~sitename)  %>%
  addCircleMarkers(data = pointSites, 
                   radius = 5, 
                   color = "blue", 
                   fillColor = "blue", 
                   fillOpacity = 0.7, 
                   stroke = FALSE, 
                   popup = ~sitename)

So we’ve learned that the site isn’t the right level at which to be careful about pollen core location. This isn’t necessarily important if you’re doing a regional- or continental-scale synthesis (arguably, the more natural uses of Neotoma) - but if you’re focused in on site-level dynamics of your core, soil interactions, etc, then it’s very important to pay attention to this kind of thing.

So, where do we find the information about core location?? It turns out we need to download the entire collection-units table ! And then filter for the collection units we care about

text="collectionunits"
collunits = content(GET(paste0('https://api.neotomadb.org/v2.0/data/dbtables/',text,'?count=false&limit=99999&offset=0')))$data



collunit_mat = matrix(nrow=length(collunits),ncol=20)

for (i in seq(length(collunits))) {
  for (j in seq(20)) {
    if (!is.null(collunits[[i]][[j]])) {
      collunit_mat[[i,j]] = collunits[[i]][[j]]
    }
  }
}

collunit_df = collunit_mat %>% as.data.frame()

names(collunit_df) = c("collectionunitid","handle","siteid","colltypeid","depenvtid","collunitname","colldate","colldevice","gpslatitude","gpslongitude","gpsaltitude","gpserror","waterdepth","substrateid","slopeaspect","slopeangle","location","notes","recdaterecreated","recdatemodified")

filteredColl_sf = collunit_df %>% dplyr::filter(collectionunitid %in% siteMetadata_df$collectionunitid) %>% st_as_sf(coords=c("gpslongitude","gpslatitude"), crs="WGS84")

#just first ten cols
datatable(collunit_df[c(1:3,5:10,17)],rownames=FALSE)
leaflet() %>%
  addTiles() %>%
  addPolygons(data = polySites, 
              color = "red", 
              weight = 2, 
              fillColor = "orange", 
              fillOpacity = 0.05, 
              popup = ~siteid)  %>%
  addCircleMarkers(data = pointSites, 
                   radius = 5, 
                   color = "blue", 
                   fillColor = "blue", 
                   fillOpacity = 0.7, 
                   stroke = FALSE, 
                   popup = ~siteid)   %>% 
  addCircleMarkers(data = filteredColl_sf, 
                   radius = 3, 
                   color = "green", 
                   fillColor = "green", 
                   fillOpacity = 0.7, 
                   stroke = FALSE, 
                   popup = ~siteid)

Even the site that was a point isn’t where the cores are actually from…